cd D:\cninfo\program\Public\annuals

insheet using results.csv, names clear
drop v1
rename code stkcd

split url, parse(".")
drop url1-url4
rename url5 type
*keep if type == "PDF"

split url, parse("/")
drop url1-url5
rename url6 filename

drop if strmatch(title, "*摘要*")
drop if strmatch(title, "*英文*")
*drop if strmatch(title, "*取消*")
drop if strmatch(title, "*半年*")
drop if strmatch(title, "*季度*")
drop if strmatch(title, "*补充*")
drop if strmatch(title, "*H股*")
drop if strmatch(title, "*公告*")
drop if strmatch(title, "*说明*")
drop if strmatch(title, "*规则*")
drop if strmatch(title, "*审计*")
drop if strmatch(title, "*形象版*")
drop if strmatch(title, "*广告版*")
drop if ((name == "中房股份")&strmatch(title,"*秋*林*"))

gen year = real(ustrregexs(0)) if ustrregexm(title, "\d{4}")
gen announce = real(substr(url, 36, 4))

replace year = 2007 if (strmatch(title, "*2OO7*")|strmatch(title, "*二00七*")) 
replace year = 2005 if (strmatch(title, "*005*")&(year==.))

drop if year == .
*drop if (announce - 1)>year
keep if announce>year
drop announce

gen date = substr(url, 36, 10)
order stkcd name year date
sort stkcd year date

bysort stkcd year: gen x = _n
*bysort stkcd year: gen X = _N
keep if x == 1
drop x

xtset stkcd year
compress

gen text = "0"*(6-strlen(strofreal(stkcd)))+strofreal(stkcd)+"-"+strofreal(year)+"-"+name+"-"+title

gen board = "深证主板"
replace board = "中小板" if stkcd>=2000&stkcd<3000
replace board = "创业板" if stkcd>=300000&stkcd<400000
replace board = "上证主板" if stkcd>=600000
gen group = strofreal(year)+"-"+board
encode group, gen(g)

save annuals_missing, replace

merge 1:1 stkcd year using missing_list
keep if _merge==3
drop _merge

append using annuals
sort stkcd year
xtset stkcd year
compress

keep if year>=2007 
save annuals, replace

forvalues i = 1/55{
	keep if g == `i'
	drop g board
	save `i'.dta, replace
	local name group[1]
	export excel using `i'.xlsx, firstrow(variables) replace
	use annuals, clear
	}
	
exit, clear
